import sys
import os
import re
import codecs
from tqdm import tqdm

# 00000290052_rir /home/gaoxinglong/env/data/south3k/gudo/0000029/00000290052.wav_rir.wav


def main(new_wav_scp, raw_text):
    raw_note_dict = dict()
    with codecs.open(raw_text, 'r') as raw:
        for line in tqdm(raw):
            key, value = re.split(r'\s+', line.strip(), 1)
            raw_note_dict[key] = value
    rir_text = '{}.rir.text'.format(raw_text)
    with codecs.open(new_wav_scp, 'r') as f, codecs.open(rir_text, 'w') as tf:
        for filename in tqdm(f.readlines()):
            line_tokens = re.split(r'\s+', filename.strip(), maxsplit=1)
            if len(line_tokens) == 2:
                if line_tokens[0].endswith('_rir'):
                    main, extension = re.split(r'_', line_tokens[0], maxsplit=1)
                    if main in raw_note_dict:
                        tf.write('{}\t{}\n'.format(
                            line_tokens[0], raw_note_dict[main]))


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print('usage: {} {}\n'.format('wav.scp, text'))
        sys.exit(-1)
    wav_scp = sys.argv[1]
    raw_text = sys.argv[2]
    main(wav_scp, raw_text=raw_text)
